from pyspark import SparkConf,SparkContext
import os
os.environ["PYSPARK_PYTHON"] = "D:/Python3.10.7/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("My App")
sc = SparkContext(conf=conf)

f = sc.parallelize(["a1","a2","a3","b1","b2","b3"])
#filter 只保留a开头的
f = f.filter(lambda x:x.startswith("a"))
print(f.collect()) # ['a1', 'a2', 'a3']
sc.stop()